Aminet 2

home *** CD-ROM | disk | FTP | other *** search

/ Aminet 2 / Aminet AMIGA CDROM (1994)(Walnut Creek)[Feb 1994][W.O. 44790-1].iso / Aminet / text / hyper / guide2html.lha / AG2HTML.pl next >

Wrap

Perl Script | 1993-11-03 | 8KB | 212 lines

#!/usr/local/bin/perl # This code is (c) 1993 to Michael Witbrock # You may use it and modify and redistribute it freely, # but you may not sell it in any way (including in disk collections) # without first recieving my permission. # If you significantly improve it, please let me know so that I can # use the new version. # You may contact me as witbrock@cs.cmu.edu # P.S. I know that this is redundant. It's my second perl script, and I don't # yet know how to do subroutines. When I do, it will be both neater and shorter. # P.P.S the reason it is preformatted is because many AG documents have button # layouts which depend on this. Perhaps later I will make it smarter, so that # it can recognise obvious text paragraphs and do the right thing. # DONE: P.P.P.S handling some of the style flags, and the next, menu, prev, help, etc buttons # is next, along with removing what can't be done (background colours etc). #Changes: October 16 1993 MJW # Allow email addresses. # Translate & to & > to > < to < if ( $#ARGV != 0 ) { die "Usage: %0 <AmigaGuideFile> $!\n N.B. This program puts AG node HTML files in a dir.\n"; } $agname = $ARGV[0]; $_ = $agname; if (/(.*)\.guide/){ $root = $1; } else { die "Error: $agname doesn't seem to be an AmigaGuide(TM) file.\n"; } $dirname = $root."_Sections"; if (! (-e $dirname)){ mkdir($dirname,oct(777)); } open (guidef, $agname) || die "Can't open $agname: $!\n"; $databasefound=0; while (<guidef>) { chop; if ( (m#@[dD][Aa][Tt][Aa][bB][aA][sS][eE]\s*"(.*)"#) || (m#@[dD][Aa][Tt][Aa][bB][aA][sS][eE]\s*(\S*)#)){ if ($databasefound != 0) { print "IGNORED: database label $1 found after first one $database\n"; next; } $databasefound = 1; $database=$1; print "Database: $database\n"; while (<guidef>){ chop; $2="Untitled"; $buttonline = "<HR>"; if ( (m#@[Nn][oO][dD][Ee]\s*"(\S*)"\s*"(.*)"#) || (m#@[Nn][oO][dD][Ee]\s*(\S*)\s*"(.*)"#) || (m#@[Nn][oO][dD][Ee]\s*(\S*)#) ){ # found @Node LABEL "title" # or just @node Label or even @node "label" "title" $nodename = $1; $nodetitle = $2; $htmlname = $dirname."/".$1.".HTML"; if (-e $htmlname) { unlink($htmlname); } open (pagef,'>'.$htmlname) || die "Can't open $htmlname to write $!\n"; print pagef "<HTML>\n<TITLE>$nodetitle</TITLE>\n"; print pagef "<H1>$nodetitle</H1>\n<pre>\n"; last; } else { if (m#\S*#) {print "# SKIPPED while looking for @node: $_ \n"; } next; } } # Found first @node line print "NOTE: main node is $htmlname\n"; # Now look for end of first node while (<guidef>){ chop; if (m#@[eE][nN][dD][Nn][oO][dD][Ee]#){ # found @endnode #print "Found @endnode\n"; print pagef "</pre>\n"; if ($buttonline ne "<HR>") { print pagef "$buttonline"; } print pagef "<HR>HTML Conversion by AG2HTML.pl & <a href=\"http://www.cs.cmu.edu:8001/Web/People/mjw/mjwhome.html\">witbrock@cs.cmu.edu</a>\n"; close (pagef); last; } else { # Remember TOC link, and delete the line if found if (m#@[tT][oO][cC]\s*"?([^\"\s\}]*)"?#){ $buttonline = $buttonline."<a href=\"$1\.HTML\">[Contents]<\/a> "; next; } # Remember prev link, and delete the line if found if (m#@[pP][rR][eE][vV]\s*"?([^\"\s\}]*)"?#){ $buttonline = $buttonline."<a href=\"$1\.HTML\">[Browse <-]<\/a> "; next; } # Remember next link, and delete the line if found if (m#@[nN][eE][xX][tT]\s*"?([^\"\s\}]*)"?#){ $buttonline = $buttonline."<a href=\"$1\.HTML\">[Browse ->]<\/a> "; next; } # Remember help link, and delete the line if found if (m#@[hH][eE][lL][pP]\s*"?([^\"\s\}]*)"?#){ $buttonline = $buttonline."<a href=\"$1\.HTML\">[Help]<\/a> "; next; } # otherwise look for links, or plain text study; s/&/&/g; s/>/>/g; s/</</g; s/@\{\s*\"([^\"]*)\"\s+link\s+\"*([^\"\s\}]*)\"*\s*\}/\<a href=\"$2\.HTML\"\>$1<\/a>/g; s/@\{[bB]\}//g; s/@\{[uU][bB]\}/<\/B>/g; # bold s/@\{[iI]\}//g; s/@\{[uU][iI]\}/<\/I>/g; #italic if (m#@{#){ # Recognise and hide unhandled cases print "UNHANDLED?: $_\n"; s/(@\{[^\}]*\})/<!- Unhandled AmigaGuide(TM) sequence "$1">/g; } print pagef "$_\n"; } } # found end of first node, or of the file } else { # No database label on this line if ($databasefound == 0){ # stuff before first @database is ignored print "#SKIPPED while looking for database: $_\n"; next; } # otherwise, it may be a perfectly good line # check whether it is the start of a new node $2="Untitled"; $buttonline = "<HR>"; if ( (m#@[Nn][oO][dD][Ee]\s*"(\S*)"\s*"(.*)"#) || (m#@[Nn][oO][dD][Ee]\s*(\S*)\s*"(.*)"#) || (m#@[Nn][oO][dD][Ee]\s*(\S*)#) ){ # found @Node LABEL "title", @node Label, or even @node "label" "title" $nodename = $1; $nodetitle = $2; $htmlname = $dirname."/".$1.".HTML"; if (-e $htmlname) { unlink($htmlname); } open (pagef,'>'.$htmlname) || die "Can't open $htmlname to write $!\n"; print pagef "<HTML>\n<TITLE>$nodetitle</TITLE>\n"; print pagef "<H1>$nodetitle</H1>\n<pre>\n"; # print "Found Node: $nodename $nodetitle as $htmlname \n"; # since we found the beginning of the node, copy to the end while (<guidef>){ chop; if (m#@[eE][nN][dD][Nn][oO][dD][Ee]#){ # print "Found @endnode\n"; # found @endnode print pagef "</pre>\n"; if ($buttonline ne "<HR>") { print pagef "$buttonline"; } print pagef "<HR>HTML Conversion by AG2HTML.pl & <a href=\"http://www.cs.cmu.edu:8001/Web/People/mjw/mjwhome.html\">witbrock@cs.cmu.edu</a>\n"; close (pagef); last; } else { # Remember TOC link, and delete the line if found if (m#@[tT][oO][cC]\s*"?([^\"\s\}]*)"?#){ $buttonline = $buttonline."<a href=\"$1\.HTML\">[Contents]<\/a> "; next; } # Remember prev link, and delete the line if found if (m#@[pP][rR][eE][vV]\s*"?([^\"\s\}]*)"?#){ $buttonline = $buttonline."<a href=\"$1\.HTML\">[Browse <-]<\/a> "; next; } # Remember next link, and delete the line if found if (m#@[nN][eE][xX][tT]\s*"?([^\"\s\}]*)"?#){ $buttonline = $buttonline."<a href=\"$1\.HTML\">[Browse ->]<\/a> "; next; } # Remember help link, and delete the line if found if (m#@[hH][eE][lL][pP]\s*"?([^\"\s\}]*)"?#){ $buttonline = $buttonline."<a href=\"$1\.HTML\">[Help]<\/a> "; next; } # otherwise look for links, or plain text study; s/&/&/g; s/>/>/g; s/</</g; s/@\{\s*\"([^\"]*)\"\s+link\s+\"*([^\"\s\}]*)\"*\s*\}/\<a href=\"$2\.HTML\"\>$1<\/a>/g; s/@\{[bB]\}//g; s/@\{[uU][bB]\}/<\/B>/g; # bold s/@\{[iI]\}//g; s/@\{[uU][iI]\}/<\/I>/g; #italic if (m#@{#){ # Recognise and hide unhandled cases print "UNHANDLED?: $_\n"; s/(@\{[^\}]*\})/<!- Unhandled AmigaGuide(TM) sequence "$1">/g; } print pagef "$_\n"; } } } else { # Haven't yet found a @node if (m#\S*#) {print "# SKIPPED while looking for @node: $_ \n"; } next; } } } close guidef;